Lab 7

Author

Jonathan Coombs

Setup

Code
# install.packages("gganimate")
# install.packages("gifski")
library(tidyverse)
library(lubridate)
library(gganimate)
library(gifski)
theme_set(theme_bw())
Code
 download.file(url="https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv", 
               destfile = "data/time_series_covid19_confirmed_global.csv")
Code
time_series_confirmed <- read_csv("data/time_series_covid19_confirmed_global.csv")|>
  rename(Province_State = "Province/State", Country_Region = "Country/Region")

Examples

Code
time_series_confirmed_long <- time_series_confirmed |> 
               pivot_longer(-c(Province_State, Country_Region, Lat, Long),
                            names_to = "Date", values_to = "Confirmed") 
time_series_confirmed_long$Date <- mdy(time_series_confirmed_long$Date)
time_series_confirmed_long|> 
  group_by(Country_Region, Date) |> 
  summarise(Confirmed = sum(Confirmed)) |> 
  filter (Country_Region == "US") |> 
  ggplot(aes(x = Date,  y = Confirmed)) + 
    geom_point() +
    geom_line() +
    ggtitle("US COVID-19 Confirmed Cases")

Code
time_series_confirmed_long |> 
    group_by(Country_Region, Date) |> 
    summarise(Confirmed = sum(Confirmed)) |> 
    filter (Country_Region %in% c("China","France","Italy", 
                                "Korea, South", "US")) |> 
    ggplot(aes(x = Date,  y = Confirmed, color = Country_Region)) + 
      geom_point() +
      geom_line() +
      ggtitle("COVID-19 Confirmed Cases")

Code
time_series_confirmed_long_daily <-time_series_confirmed_long |> 
    group_by(Country_Region, Date) |> 
    summarise(Confirmed = sum(Confirmed)) |> 
    mutate(Daily = Confirmed - lag(Confirmed, default = first(Confirmed )))
time_series_confirmed_long_daily |> 
    filter (Country_Region == "US") |> 
    ggplot(aes(x = Date,  y = Daily, color = Country_Region)) + 
      geom_point() +
      ggtitle("COVID-19 Confirmed Cases")

Code
time_series_confirmed_long_daily |> 
    filter (Country_Region == "US") |> 
    ggplot(aes(x = Date,  y = Daily, color = Country_Region)) + 
      geom_line() +
      ggtitle("COVID-19 Confirmed Cases")

Code
time_series_confirmed_long_daily |> 
    filter (Country_Region == "US") |> 
    ggplot(aes(x = Date,  y = Daily, color = Country_Region)) + 
      geom_smooth() +
      ggtitle("COVID-19 Confirmed Cases")

Code
time_series_confirmed_long_daily |> 
    filter (Country_Region == "US") |> 
    ggplot(aes(x = Date,  y = Daily, color = Country_Region)) + 
      geom_smooth(method = "gam", se = FALSE) +
      ggtitle("COVID-19 Confirmed Cases")

Code
daily_counts <- time_series_confirmed_long_daily |> 
      filter (Country_Region == "US")

p <- ggplot(daily_counts, aes(x = Date,  y = Daily, color = Country_Region)) + 
        geom_point() +
        ggtitle("Confirmed COVID-19 Cases") +
# gganimate lines  
        geom_point(aes(group = seq_along(Date))) +
        transition_reveal(Date) 

# make the animation
 animate(p, renderer = gifski_renderer(), end_pause = 15)

Code
 anim_save("daily_counts_US.gif", p)
Code
# This download may take about 5 minutes. You only need to do this once so set `#| eval: false` in your qmd file
download.file(url="https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_global.csv", 
  destfile = "data/time_series_covid19_deaths_global.csv")
Code
time_series_deaths_confirmed <- read_csv("data/time_series_covid19_deaths_global.csv")|>
  rename(Province_State = "Province/State", Country_Region = "Country/Region")

time_series_deaths_long <- time_series_deaths_confirmed |> 
    pivot_longer(-c(Province_State, Country_Region, Lat, Long),
        names_to = "Date", values_to = "Confirmed") 

time_series_deaths_long$Date <- mdy(time_series_deaths_long$Date)
p <- time_series_deaths_long |>
  filter (Country_Region %in% c("US","Canada", "Mexico","Brazil","Egypt","Ecuador","India", "Netherlands", "Germany", "China" )) |>
  ggplot(aes(x=Country_Region, y=Confirmed, color= Country_Region)) + 
    geom_point(aes(size=Confirmed)) + 
    transition_time(Date) + 
    labs(title = "Cumulative Deaths: {frame_time}") + 
    ylab("Deaths") +
    theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1))
# make the animation
animate(p, renderer = gifski_renderer(), end_pause = 15)

Exercises

1

Examples

Code
library(tibble)

table1 <- tibble(
  country = c("Afghanistan", "Afghanistan", "Brazil", "Brazil", "China", "China"),
  year = c(1999, 2000, 1999, 2000, 1999, 2000),
  cases = c(745, 2666, 37737, 80488, 212258, 213766),
  population = c(19987071, 20595360, 172006362, 174504898, 1272915272, 1280428583)
)

library(tidyr)

table2 <- table1 %>%
  pivot_longer(cols = c(cases, population), 
               names_to = "type", 
               values_to = "count")

table3 <- table1 %>%
  mutate(rate = paste(cases, population, sep = "/")) %>%
  select(country, year, rate)

table1
# A tibble: 6 × 4
  country      year  cases population
  <chr>       <dbl>  <dbl>      <dbl>
1 Afghanistan  1999    745   19987071
2 Afghanistan  2000   2666   20595360
3 Brazil       1999  37737  172006362
4 Brazil       2000  80488  174504898
5 China        1999 212258 1272915272
6 China        2000 213766 1280428583
Code
table2
# A tibble: 12 × 4
   country      year type            count
   <chr>       <dbl> <chr>           <dbl>
 1 Afghanistan  1999 cases             745
 2 Afghanistan  1999 population   19987071
 3 Afghanistan  2000 cases            2666
 4 Afghanistan  2000 population   20595360
 5 Brazil       1999 cases           37737
 6 Brazil       1999 population  172006362
 7 Brazil       2000 cases           80488
 8 Brazil       2000 population  174504898
 9 China        1999 cases          212258
10 China        1999 population 1272915272
11 China        2000 cases          213766
12 China        2000 population 1280428583
Code
table3
# A tibble: 6 × 3
  country      year rate             
  <chr>       <dbl> <chr>            
1 Afghanistan  1999 745/19987071     
2 Afghanistan  2000 2666/20595360    
3 Brazil       1999 37737/172006362  
4 Brazil       2000 80488/174504898  
5 China        1999 212258/1272915272
6 China        2000 213766/1280428583
Code
# Compute rate per 10,000
table1 |>
  mutate(rate = cases / population * 10000)
# A tibble: 6 × 5
  country      year  cases population  rate
  <chr>       <dbl>  <dbl>      <dbl> <dbl>
1 Afghanistan  1999    745   19987071 0.373
2 Afghanistan  2000   2666   20595360 1.29 
3 Brazil       1999  37737  172006362 2.19 
4 Brazil       2000  80488  174504898 4.61 
5 China        1999 212258 1272915272 1.67 
6 China        2000 213766 1280428583 1.67 
Code
# Compute total cases per year
table1 |> 
  group_by(year) |> 
  summarize(total_cases = sum(cases))
# A tibble: 2 × 2
   year total_cases
  <dbl>       <dbl>
1  1999      250740
2  2000      296920
Code
# Visualize changes over time
ggplot(table1, aes(x = year, y = cases)) +
  geom_line(aes(group = country), color = "grey50") +
  geom_point(aes(color = country, shape = country)) +
  scale_x_continuous(breaks = c(1999, 2000)) # x-axis breaks at 1999 and 2000

5.2.1

  1. In table 1, each observation is a unique country/year pair. The columns are country, year, the number of TB cases, and the population of that country in that year to compare against the number of TB cases. In table 2, observations are changed to be each unique country/year/data type (population vs cases) combination, and while country and year remain as columns, the last two columns are changed to the data type (case or population) and the count for that data. In table 3, observations are identical to those in table 1, with the sole change being compressing the cases and population columns into a single rate column equal to the # of cases dividided by the population.

  2. In table 2, I’d extract the number of cases per country per year from the count column (odd row numbers), divide this by the population per country per year also in the count column (even row numbers, specifically row i+1 where row i is where the # of cases is taken from) and multiply by 10,000. I would store these rates in new columns with type “rate”, such that each country/year pair would have 3 rows. (This would be an inefficient way to store the data, but consistent with the table’s design.) In table 3, I’d extract the “rate” column, convert the string to a numerical value (i.e. go from “745/19987071” to the value 745/19987071), multiply by 10,000, and mutate the table to add a new column “rate per 10,000” with this value.

Examples

Code
billboard
# A tibble: 317 × 79
   artist     track date.entered   wk1   wk2   wk3   wk4   wk5   wk6   wk7   wk8
   <chr>      <chr> <date>       <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
 1 2 Pac      Baby… 2000-02-26      87    82    72    77    87    94    99    NA
 2 2Ge+her    The … 2000-09-02      91    87    92    NA    NA    NA    NA    NA
 3 3 Doors D… Kryp… 2000-04-08      81    70    68    67    66    57    54    53
 4 3 Doors D… Loser 2000-10-21      76    76    72    69    67    65    55    59
 5 504 Boyz   Wobb… 2000-04-15      57    34    25    17    17    31    36    49
 6 98^0       Give… 2000-08-19      51    39    34    26    26    19     2     2
 7 A*Teens    Danc… 2000-07-08      97    97    96    95   100    NA    NA    NA
 8 Aaliyah    I Do… 2000-01-29      84    62    51    41    38    35    35    38
 9 Aaliyah    Try … 2000-03-18      59    53    38    28    21    18    16    14
10 Adams, Yo… Open… 2000-08-26      76    76    74    69    68    67    61    58
# ℹ 307 more rows
# ℹ 68 more variables: wk9 <dbl>, wk10 <dbl>, wk11 <dbl>, wk12 <dbl>,
#   wk13 <dbl>, wk14 <dbl>, wk15 <dbl>, wk16 <dbl>, wk17 <dbl>, wk18 <dbl>,
#   wk19 <dbl>, wk20 <dbl>, wk21 <dbl>, wk22 <dbl>, wk23 <dbl>, wk24 <dbl>,
#   wk25 <dbl>, wk26 <dbl>, wk27 <dbl>, wk28 <dbl>, wk29 <dbl>, wk30 <dbl>,
#   wk31 <dbl>, wk32 <dbl>, wk33 <dbl>, wk34 <dbl>, wk35 <dbl>, wk36 <dbl>,
#   wk37 <dbl>, wk38 <dbl>, wk39 <dbl>, wk40 <dbl>, wk41 <dbl>, wk42 <dbl>, …
Code
billboard |> 
  pivot_longer(
    cols = starts_with("wk"), 
    names_to = "week", 
    values_to = "rank"
  )
# A tibble: 24,092 × 5
   artist track                   date.entered week   rank
   <chr>  <chr>                   <date>       <chr> <dbl>
 1 2 Pac  Baby Don't Cry (Keep... 2000-02-26   wk1      87
 2 2 Pac  Baby Don't Cry (Keep... 2000-02-26   wk2      82
 3 2 Pac  Baby Don't Cry (Keep... 2000-02-26   wk3      72
 4 2 Pac  Baby Don't Cry (Keep... 2000-02-26   wk4      77
 5 2 Pac  Baby Don't Cry (Keep... 2000-02-26   wk5      87
 6 2 Pac  Baby Don't Cry (Keep... 2000-02-26   wk6      94
 7 2 Pac  Baby Don't Cry (Keep... 2000-02-26   wk7      99
 8 2 Pac  Baby Don't Cry (Keep... 2000-02-26   wk8      NA
 9 2 Pac  Baby Don't Cry (Keep... 2000-02-26   wk9      NA
10 2 Pac  Baby Don't Cry (Keep... 2000-02-26   wk10     NA
# ℹ 24,082 more rows
Code
billboard |> 
  pivot_longer(
    cols = starts_with("wk"), 
    names_to = "week", 
    values_to = "rank",
    values_drop_na = TRUE
  )
# A tibble: 5,307 × 5
   artist  track                   date.entered week   rank
   <chr>   <chr>                   <date>       <chr> <dbl>
 1 2 Pac   Baby Don't Cry (Keep... 2000-02-26   wk1      87
 2 2 Pac   Baby Don't Cry (Keep... 2000-02-26   wk2      82
 3 2 Pac   Baby Don't Cry (Keep... 2000-02-26   wk3      72
 4 2 Pac   Baby Don't Cry (Keep... 2000-02-26   wk4      77
 5 2 Pac   Baby Don't Cry (Keep... 2000-02-26   wk5      87
 6 2 Pac   Baby Don't Cry (Keep... 2000-02-26   wk6      94
 7 2 Pac   Baby Don't Cry (Keep... 2000-02-26   wk7      99
 8 2Ge+her The Hardest Part Of ... 2000-09-02   wk1      91
 9 2Ge+her The Hardest Part Of ... 2000-09-02   wk2      87
10 2Ge+her The Hardest Part Of ... 2000-09-02   wk3      92
# ℹ 5,297 more rows
Code
billboard_longer <- billboard |> 
  pivot_longer(
    cols = starts_with("wk"), 
    names_to = "week", 
    values_to = "rank",
    values_drop_na = TRUE
  ) |> 
  mutate(
    week = parse_number(week)
  )
billboard_longer
# A tibble: 5,307 × 5
   artist  track                   date.entered  week  rank
   <chr>   <chr>                   <date>       <dbl> <dbl>
 1 2 Pac   Baby Don't Cry (Keep... 2000-02-26       1    87
 2 2 Pac   Baby Don't Cry (Keep... 2000-02-26       2    82
 3 2 Pac   Baby Don't Cry (Keep... 2000-02-26       3    72
 4 2 Pac   Baby Don't Cry (Keep... 2000-02-26       4    77
 5 2 Pac   Baby Don't Cry (Keep... 2000-02-26       5    87
 6 2 Pac   Baby Don't Cry (Keep... 2000-02-26       6    94
 7 2 Pac   Baby Don't Cry (Keep... 2000-02-26       7    99
 8 2Ge+her The Hardest Part Of ... 2000-09-02       1    91
 9 2Ge+her The Hardest Part Of ... 2000-09-02       2    87
10 2Ge+her The Hardest Part Of ... 2000-09-02       3    92
# ℹ 5,297 more rows
Code
billboard_longer |> 
  ggplot(aes(x = week, y = rank, group = track)) + 
  geom_line(alpha = 0.25) + 
  scale_y_reverse()

Code
df <- tribble(
  ~id,  ~bp1, ~bp2,
   "A",  100,  120,
   "B",  140,  115,
   "C",  120,  125
)
df |> 
  pivot_longer(
    cols = bp1:bp2,
    names_to = "measurement",
    values_to = "value"
  )
# A tibble: 6 × 3
  id    measurement value
  <chr> <chr>       <dbl>
1 A     bp1           100
2 A     bp2           120
3 B     bp1           140
4 B     bp2           115
5 C     bp1           120
6 C     bp2           125
Code
who2
# A tibble: 7,240 × 58
   country      year sp_m_014 sp_m_1524 sp_m_2534 sp_m_3544 sp_m_4554 sp_m_5564
   <chr>       <dbl>    <dbl>     <dbl>     <dbl>     <dbl>     <dbl>     <dbl>
 1 Afghanistan  1980       NA        NA        NA        NA        NA        NA
 2 Afghanistan  1981       NA        NA        NA        NA        NA        NA
 3 Afghanistan  1982       NA        NA        NA        NA        NA        NA
 4 Afghanistan  1983       NA        NA        NA        NA        NA        NA
 5 Afghanistan  1984       NA        NA        NA        NA        NA        NA
 6 Afghanistan  1985       NA        NA        NA        NA        NA        NA
 7 Afghanistan  1986       NA        NA        NA        NA        NA        NA
 8 Afghanistan  1987       NA        NA        NA        NA        NA        NA
 9 Afghanistan  1988       NA        NA        NA        NA        NA        NA
10 Afghanistan  1989       NA        NA        NA        NA        NA        NA
# ℹ 7,230 more rows
# ℹ 50 more variables: sp_m_65 <dbl>, sp_f_014 <dbl>, sp_f_1524 <dbl>,
#   sp_f_2534 <dbl>, sp_f_3544 <dbl>, sp_f_4554 <dbl>, sp_f_5564 <dbl>,
#   sp_f_65 <dbl>, sn_m_014 <dbl>, sn_m_1524 <dbl>, sn_m_2534 <dbl>,
#   sn_m_3544 <dbl>, sn_m_4554 <dbl>, sn_m_5564 <dbl>, sn_m_65 <dbl>,
#   sn_f_014 <dbl>, sn_f_1524 <dbl>, sn_f_2534 <dbl>, sn_f_3544 <dbl>,
#   sn_f_4554 <dbl>, sn_f_5564 <dbl>, sn_f_65 <dbl>, ep_m_014 <dbl>, …
Code
who2 |> 
  pivot_longer(
    cols = !(country:year),
    names_to = c("diagnosis", "gender", "age"), 
    names_sep = "_",
    values_to = "count"
  )
# A tibble: 405,440 × 6
   country      year diagnosis gender age   count
   <chr>       <dbl> <chr>     <chr>  <chr> <dbl>
 1 Afghanistan  1980 sp        m      014      NA
 2 Afghanistan  1980 sp        m      1524     NA
 3 Afghanistan  1980 sp        m      2534     NA
 4 Afghanistan  1980 sp        m      3544     NA
 5 Afghanistan  1980 sp        m      4554     NA
 6 Afghanistan  1980 sp        m      5564     NA
 7 Afghanistan  1980 sp        m      65       NA
 8 Afghanistan  1980 sp        f      014      NA
 9 Afghanistan  1980 sp        f      1524     NA
10 Afghanistan  1980 sp        f      2534     NA
# ℹ 405,430 more rows
Code
household
# A tibble: 5 × 5
  family dob_child1 dob_child2 name_child1 name_child2
   <int> <date>     <date>     <chr>       <chr>      
1      1 1998-11-26 2000-01-29 Susan       Jose       
2      2 1996-06-22 NA         Mark        <NA>       
3      3 2002-07-11 2004-04-05 Sam         Seth       
4      4 2004-10-10 2009-08-27 Craig       Khai       
5      5 2000-12-05 2005-02-28 Parker      Gracie     
Code
household |> 
  pivot_longer(
    cols = !family, 
    names_to = c(".value", "child"), 
    names_sep = "_", 
    values_drop_na = TRUE
  )
# A tibble: 9 × 4
  family child  dob        name  
   <int> <chr>  <date>     <chr> 
1      1 child1 1998-11-26 Susan 
2      1 child2 2000-01-29 Jose  
3      2 child1 1996-06-22 Mark  
4      3 child1 2002-07-11 Sam   
5      3 child2 2004-04-05 Seth  
6      4 child1 2004-10-10 Craig 
7      4 child2 2009-08-27 Khai  
8      5 child1 2000-12-05 Parker
9      5 child2 2005-02-28 Gracie
Code
cms_patient_experience
# A tibble: 500 × 5
   org_pac_id org_nm                           measure_cd measure_title prf_rate
   <chr>      <chr>                            <chr>      <chr>            <dbl>
 1 0446157747 USC CARE MEDICAL GROUP INC       CAHPS_GRP… CAHPS for MI…       63
 2 0446157747 USC CARE MEDICAL GROUP INC       CAHPS_GRP… CAHPS for MI…       87
 3 0446157747 USC CARE MEDICAL GROUP INC       CAHPS_GRP… CAHPS for MI…       86
 4 0446157747 USC CARE MEDICAL GROUP INC       CAHPS_GRP… CAHPS for MI…       57
 5 0446157747 USC CARE MEDICAL GROUP INC       CAHPS_GRP… CAHPS for MI…       85
 6 0446157747 USC CARE MEDICAL GROUP INC       CAHPS_GRP… CAHPS for MI…       24
 7 0446162697 ASSOCIATION OF UNIVERSITY PHYSI… CAHPS_GRP… CAHPS for MI…       59
 8 0446162697 ASSOCIATION OF UNIVERSITY PHYSI… CAHPS_GRP… CAHPS for MI…       85
 9 0446162697 ASSOCIATION OF UNIVERSITY PHYSI… CAHPS_GRP… CAHPS for MI…       83
10 0446162697 ASSOCIATION OF UNIVERSITY PHYSI… CAHPS_GRP… CAHPS for MI…       63
# ℹ 490 more rows
Code
cms_patient_experience |> 
  pivot_wider(
    names_from = measure_cd,
    values_from = prf_rate
  )
# A tibble: 500 × 9
   org_pac_id org_nm           measure_title CAHPS_GRP_1 CAHPS_GRP_2 CAHPS_GRP_3
   <chr>      <chr>            <chr>               <dbl>       <dbl>       <dbl>
 1 0446157747 USC CARE MEDICA… CAHPS for MI…          63          NA          NA
 2 0446157747 USC CARE MEDICA… CAHPS for MI…          NA          87          NA
 3 0446157747 USC CARE MEDICA… CAHPS for MI…          NA          NA          86
 4 0446157747 USC CARE MEDICA… CAHPS for MI…          NA          NA          NA
 5 0446157747 USC CARE MEDICA… CAHPS for MI…          NA          NA          NA
 6 0446157747 USC CARE MEDICA… CAHPS for MI…          NA          NA          NA
 7 0446162697 ASSOCIATION OF … CAHPS for MI…          59          NA          NA
 8 0446162697 ASSOCIATION OF … CAHPS for MI…          NA          85          NA
 9 0446162697 ASSOCIATION OF … CAHPS for MI…          NA          NA          83
10 0446162697 ASSOCIATION OF … CAHPS for MI…          NA          NA          NA
# ℹ 490 more rows
# ℹ 3 more variables: CAHPS_GRP_5 <dbl>, CAHPS_GRP_8 <dbl>, CAHPS_GRP_12 <dbl>
Code
cms_patient_experience |> 
  pivot_wider(
    id_cols = starts_with("org"),
    names_from = measure_cd,
    values_from = prf_rate
  )
# A tibble: 95 × 8
   org_pac_id org_nm CAHPS_GRP_1 CAHPS_GRP_2 CAHPS_GRP_3 CAHPS_GRP_5 CAHPS_GRP_8
   <chr>      <chr>        <dbl>       <dbl>       <dbl>       <dbl>       <dbl>
 1 0446157747 USC C…          63          87          86          57          85
 2 0446162697 ASSOC…          59          85          83          63          88
 3 0547164295 BEAVE…          49          NA          75          44          73
 4 0749333730 CAPE …          67          84          85          65          82
 5 0840104360 ALLIA…          66          87          87          64          87
 6 0840109864 REX H…          73          87          84          67          91
 7 0840513552 SCL H…          58          83          76          58          78
 8 0941545784 GRITM…          46          86          81          54          NA
 9 1052612785 COMMU…          65          84          80          58          87
10 1254237779 OUR L…          61          NA          NA          65          NA
# ℹ 85 more rows
# ℹ 1 more variable: CAHPS_GRP_12 <dbl>
Code
df <- tribble(
  ~id, ~measurement, ~value,
  "A",        "bp1",    100,
  "B",        "bp1",    140,
  "B",        "bp2",    115, 
  "A",        "bp2",    120,
  "A",        "bp3",    105
)
df |> 
  pivot_wider(
    names_from = measurement,
    values_from = value
  )
# A tibble: 2 × 4
  id      bp1   bp2   bp3
  <chr> <dbl> <dbl> <dbl>
1 A       100   120   105
2 B       140   115    NA
Code
df |> 
  distinct(measurement) |> 
  pull()
[1] "bp1" "bp2" "bp3"
Code
df |> 
  select(-measurement, -value) |> 
  distinct()
# A tibble: 2 × 1
  id   
  <chr>
1 A    
2 B    
Code
df |> 
  select(-measurement, -value) |> 
  distinct() |> 
  mutate(x = NA, y = NA, z = NA)
# A tibble: 2 × 4
  id    x     y     z    
  <chr> <lgl> <lgl> <lgl>
1 A     NA    NA    NA   
2 B     NA    NA    NA   
Code
df <- tribble(
  ~id, ~measurement, ~value,
  "A",        "bp1",    100,
  "A",        "bp1",    102,
  "A",        "bp2",    120,
  "B",        "bp1",    140, 
  "B",        "bp2",    115
)
df |>
  pivot_wider(
    names_from = measurement,
    values_from = value
  )
# A tibble: 2 × 3
  id    bp1       bp2      
  <chr> <list>    <list>   
1 A     <dbl [2]> <dbl [1]>
2 B     <dbl [1]> <dbl [1]>
Code
df |> 
  group_by(id, measurement) |> 
  summarize(n = n(), .groups = "drop") |> 
  filter(n > 1)
# A tibble: 1 × 3
  id    measurement     n
  <chr> <chr>       <int>
1 A     bp1             2

2

Code
time_series_confirmed_long |> 
  group_by(Country_Region, Date) |> 
  summarise(Confirmed = sum(Confirmed), .groups = "drop") |> 
  filter(Country_Region %in% c("China", "France", "Italy", 
                               "Korea, South", "US")) |> 
  ggplot(aes(x = Date, y = Confirmed)) + 
    geom_point(color = "steelblue") +
    geom_line(color = "steelblue") +
    facet_wrap(~ Country_Region, scales = "free_y") +
    ggtitle("COVID-19 Confirmed Cases by Country") +
    theme_minimal()

3

Code
time_series_confirmed_long_daily |> 
    filter (Country_Region %in% c("US", "Canada", "Mexico", "Brazil", "Egypt")) |> 
    ggplot(aes(x = Date,  y = Daily, color = Country_Region)) + 
      geom_smooth() +
      ggtitle("COVID-19 Confirmed Cases")

4

Code
p <- time_series_deaths_long |>
  filter (Country_Region %in% c("US","Canada", "Mexico")) |>
  ggplot(aes(x=Country_Region, y=Confirmed, color= Country_Region)) + 
    geom_point(aes(size=Confirmed)) + 
    transition_time(Date) + 
    labs(title = "Cumulative Deaths: {frame_time}") + 
    ylab("Deaths") +
    theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1))
# make the animation
animate(p, renderer = gifski_renderer(), end_pause = 15)

5

Code
time_series_confirmed_deaths_daily <- time_series_deaths_long |> 
    group_by(Country_Region, Date) |> 
    summarise(Confirmed = sum(Confirmed)) |> 
    mutate(Daily = Confirmed - lag(Confirmed, default = first(Confirmed )))

daily_counts_deaths <- time_series_confirmed_deaths_daily |> 
      filter (Country_Region %in% c("US","Canada", "Mexico"))

ggplot(daily_counts_deaths, aes(x = Date,  y = Daily, color = Country_Region)) + 
        geom_point() +
        ggtitle("Confirmed COVID-19 Deaths")

6

Code
graph <- time_series_confirmed_long_daily |> 
      filter (Country_Region == "US" |
                Country_Region == "Brazil" |
                Country_Region == "Canada" |
                Country_Region == "China" |
                Country_Region == "Egypt") |>
  ggplot(aes(x = Date,  y = Daily, color = Country_Region)) + 
        geom_point() +
        ggtitle("Confirmed COVID-19 Cases") +
# gganimate lines  
        geom_point(aes(group = seq_along(Date))) +
        transition_reveal(Date)

animate(graph, renderer = gifski_renderer(), end_pause = 15)